
*****************************************************
*           REGRESSIONSANALYSE                      *
*              Josef Brüderl                        *
*               Januar 2019                         *
*****************************************************

*****************************************************
* Das logistische Regressionsmodell
*****************************************************

clear 
set more off
version 15

* Daten: ALLBUS 2002
cd "C:\Users\bruederl\LRZ Sync+Share\Vorlesung Querschnittsdatenanalyse\Do Files\"
use  AllbReg, clear


* ---------------------------------------
* Datenaufbereitung
* ---------------------------------------
* Nur 30 - 65 Jährige
keep if alter>29 & alter<66


******************************************
* Ein bivariates Modell
* Bsp.: Arbeitslosigkeit die letzen 10 Jahre 
******************************************

* Scatterplot
twoway   scatter arblos alter, mcolor(blue) msize(small) msymbol(circle) jitter(10)    ///
     ytitle("P(Arbeitslosigkeit)", size(large) margin(medsmall))                       ///
     yscale(range(-0.1 1.1)) ylabel(0(0.2)1, labsize(medlarge) angle(horizontal) grid) ///
     xtitle("Alter", size(large) margin(medsmall)) xlabel(30(5)65, labsize(medlarge))  ///
	 legend(off)


* Lineares Wahrscheinlichkeitsmodell
regress arblos alter 
gen lwm1 = _b[_cons] + _b[alter]*alter             //Regr.gerade des LWM
estimates store lwm1

twoway   (scatter arblos alter, mcolor(blue) msize(small) msymbol(circle) jitter(10))  ///
         (lfit arblos alter, lcolor(red) lwidth(thick)),                               ///
	 ytitle("P(Arbeitslosigkeit)", size(large) margin(medsmall))                       ///
     yscale(range(-0.1 1.1)) ylabel(0(0.2)1, labsize(medlarge) angle(horizontal) grid) ///
     xtitle("Alter", size(large) margin(medsmall)) xlabel(30(5)65, labsize(medlarge))  ///
	 legend(off) xsize(6) ysize(4)

	 
* Logistische Funktion
twoway   (function invlogit(2*x), range(-3 +3) lcolor(blue) lwidth(thick)),  ///
	 ytitle("P(Y=1)", size(large) margin(medsmall))                      ///
     ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))          ///
     xtitle("X", size(large) margin(medsmall)) xlabel(-3(1)3, labsize(medlarge)) ///
	 xsize(6) ysize(4)


* Logistische Regression
logit arblos alter
gen log1 = invlogit(_b[_cons] + _b[alter]*alter)   //Regr.gerade des Logit Modells
estimates store log1

twoway   (scatter arblos alter, mcolor(blue) msize(small) msymbol(circle) jitter(10))  ///
         (line log1 alter, sort  lcolor(red)  lwidth(thick)),     /// 
	 ytitle("P(Arbeitslosigkeit)", size(large) margin(medsmall))                       ///
     yscale(range(-0.1 1.1)) ylabel(0(0.2)1, labsize(medlarge) angle(horizontal) grid) ///
     xtitle("Alter", size(large) margin(medsmall)) xlabel(30(5)65, labsize(medlarge))  ///
	 legend(off) xsize(6) ysize(4)


* Vergleich Logit - LWM
twoway   (scatter arblos alter, mcolor(gs10) msize(small) msymbol(circle) jitter(10))    ///
         (line lwm1 alter, sort  lcolor(blue)  lwidth(medthick))                         /// 
         (line log1 alter, sort  lcolor(red)   lwidth(medthick)),                        /// 
	 ytitle("P(Arbeitslosigkeit)", size(large) margin(medsmall)) yscale(range(-0.1 1.1)) ///
     ylabel(0(0.2)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))             ///
     xtitle("Alter", size(large) margin(medsmall)) xlabel(30(5)65, labsize(medlarge))    ///
	 legend(order(2 "lineares Modell" 3 "Logit Modell") rows(2) position(3) ring(0))     ///
	 xsize(6) ysize(4)


* Diagnostik - Streudiagramm mit Lowess
twoway   (scatter arblos alter, mcolor(gs10) msize(small) msymbol(circle) jitter(10))    ///
         (line log1 alter, sort  lcolor(red)   lwidth(medthick))                         ///
         (lowess arblos alter, bwidth(0.99) lcolor(blue) lwidth(medthick)),                    ///		 
	 ytitle("P(Arbeitslosigkeit)", size(large) margin(medsmall)) yscale(range(-0.1 1.1)) ///
     ylabel(0(0.2)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))             ///
     xtitle("Alter", size(large) margin(medsmall)) xlabel(30(5)65, labsize(medlarge))    ///
	 legend(order(2 "Logit Modell" 3 "Lowess") rows(2) position(3) ring(0))     ///
	 xsize(6) ysize(4)



******************************************
* Ein bivariates Modell mit diskretem X
******************************************
tab   arblos ost, col chi2
logit arblos ost 

prchange, x(ost=0) uncentered   //funktioniert nur, wenn SPost Ados geladen sind


******************************************
* Ein multiples Modell
******************************************
logit arblos bild alter frau ost
logit arblos bild alter frau ost, or
fitstat        //funktioniert nur, wenn SPost Ados geladen sind


*-----------------------------------------
* Profile-Plots
*-----------------------------------------

* Bildung und Alter
logit arblos bild alter frau ost
margins, at(alter=(30(5)70) bild=(8 18)) noatlegend
marginsplot,   noci title("")                                               ///
   plot1opts(lwidth(thick) lcolor(red)  msymbol(i))                         ///
   plot2opts(lwidth(thick) lcolor(blue) msymbol(i))                         ///
   ytitle("P(Arbeitslosigkeit)", size(large))                               ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
   xtitle("Alter", size(large) margin(medsmall))                            ///  
   xlabel(30(5)70, labsize(medlarge))                                       ///
   legend(pos(1) ring(0) row(2) order(1 2) lab(1 "Bildung=8")               ///
		     lab(2 "Bildung=18") size(medlarge))                            ///
   ysize(5)


* Beruf und Ost
logit arblos bild alter frau i.ost i.beruf
margins beruf#ost              //damit werden beruf und ost gemeinsam fixiert!
marginsplot,   plotopts(connect(i))   title("")                            ///
   ytitle("P(Arbeitslosigkeit)", size(large))                              ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f)) ///
   xtitle("berufliche Stellung", size(large) margin(medsmall))             /// 
   legend(pos(11) ring(0) row(2) order(3 4)  lab(3 "Westdeutscher")        ///
          lab(4 "Ostdeutscher")  size(medlarge))                           ///
   ysize(5)

   
*-----------------------------------------
* AMEs
*-----------------------------------------

* Marginal Effects
* logit arblos bild alter frau ost
* prchange, help                                          // verschiedene MEs
* margins, at((means)_all) at((asobserved)_all) dydx(*)   // Vgl. MEMs und AMEs
* marginsplot, x(_deriv) noci horizontal xline(0) plotopts(connect(i))

* Average Marginal Effects bzw. Average Discrete Change
logit arblos bild alter i.frau i.ost
margins, dydx(*)

* Plot mit sinnvoller Skalierung
gen  bild1  = bild/10
gen  alter1 = alter/10
logit arblos bild1 alter1 i.frau i.ost
margins, dydx(*) post                    // Option "post" nötig
coefplot, xline(0) title("AMEs und 95%-KI", size(large))                     ///
    xtitle("Effekt auf P(Arbeitslos)", size(medlarge) margin(medsmall))      ///
    ytitle("") ylabel(1 "Bildungsjahre/10" 2 "Alter/10" 3 "Frau" 4 "Ost", labsize(medlarge)) ///
    xlabel(-0.35(0.05)0.35, grid labsize(medlarge))                          ///             
    xsize(5) ysize(3)

* Effekte des LWM
regress arblos bild alter frau ost

   
*------------------------------------------
* Interaktionseffekte
*------------------------------------------   

  
* Alter und Alter2 (Polynomregression ohne Interaktion)
logit arblos bild frau i.ost c.alter c.alter#c.alter
margins ost, at(alter=(30(5)65)) noatlegend
marginsplot,   noci    title("")                                            ///
   plot1opts(lwidth(thick) lcolor(red)  msymbol(i))                         ///
   plot2opts(lwidth(thick) lcolor(blue) msymbol(i))                         ///
   ytitle("P(Arbeitslosigkeit)", size(large))                               ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
   xtitle("Alter", size(large) margin(medsmall))                            ///  
   xlabel(30(5)65, labsize(medlarge))                                       ///
   legend(pos(11) ring(0) row(2) order(1 2) lab(1 "West")          ///
		     lab(2 "Ost") size(medlarge))                          ///
   ysize(5)

* Slope Interaktion: Ost##(Alter Alter2)
logit arblos bild frau i.ost##(c.alter c.alter#c.alter)  
test 1.ost#c.alter 1.ost#c.alter#c.alter
margins ost, at(alter=(30(5)65)) noatlegend
marginsplot,   noci    title("")                                            ///
   plot1opts(lwidth(thick) lcolor(red)  msymbol(i))                         ///
   plot2opts(lwidth(thick) lcolor(blue) msymbol(i))                         ///
   ytitle("P(Arbeitslosigkeit)", size(large))                               ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
   xtitle("Alter", size(large) margin(medsmall))                            ///  
   xlabel(30(5)65, labsize(medlarge))                                       ///
   legend(pos(11) ring(0) row(2) order(1 2) lab(1 "West")          ///
		     lab(2 "Ost") size(medlarge))                          ///
   ysize(5)

* Conditional Marginal-Effects-Plot
* Ost-Effekt varriert mit dem Alter
logit arblos bild frau i.ost##(c.alter c.alter#c.alter)  
margins, at(alter=(30(2)65)) dydx(ost) noatlegend
marginsplot,   recast(line) recastci(rline)                         ///
        plotopts(lwidth(thick) lcolor(blue))                        ///
		ciopts(lcolor(blue) lpattern(dash))                         ///
        ytitle("Differenz P(Arbeitslosigkeit) zu West", size(medlarge))                               ///  
        ylabel(-0.1(0.1)0.9, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
        xtitle("Alter", size(large) margin(medsmall))                            ///  
        xlabel(30(5)65, labsize(medlarge))                                       /// 		
		yline(0, lcolor(red))                                       ///
		title("Konditionaler AME von 'Ost' und 95%-KI", ///
		      size(large) margin(medium))                           ///
        ysize(5)   

		
*--------------------------------------------------------------
* Interaktion mit LWM
*--------------------------------------------------------------
regress arblos bild frau i.ost##(c.alter c.alter#c.alter)  
test 1.ost#c.alter 1.ost#c.alter#c.alter
margins ost, at(alter=(30(5)65)) noatlegend
marginsplot,   noci    title("")                                            ///
   plot1opts(lwidth(thick) lcolor(red)  msymbol(i))                         ///
   plot2opts(lwidth(thick) lcolor(blue) msymbol(i))                         ///
   ytitle("P(Arbeitslosigkeit)", size(large))                               ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
   xtitle("Alter", size(large) margin(medsmall))                            ///  
   xlabel(30(5)65, labsize(medlarge))                                       ///
   legend(pos(11) ring(0) row(2) order(1 2) lab(1 "West")          ///
		     lab(2 "Ost") size(medlarge))                          ///
   ysize(5)

* Conditional Marginal-Effects-Plot
* Ost-Effekt varriert mit dem Alter
margins, at(alter=(30(2)65)) dydx(ost) noatlegend
marginsplot,   recast(line) recastci(rline)                         ///
        plotopts(lwidth(thick) lcolor(blue))                        ///
		ciopts(lcolor(blue) lpattern(dash))                         ///
        ytitle("Differenz P(Arbeitslosigkeit) zu West", size(medlarge))                               ///  
        ylabel(-0.1(0.1)0.9, labsize(medlarge) angle(horizontal) grid format(%3.1f))  ///
        xtitle("Alter", size(large) margin(medsmall))                            ///  
        xlabel(30(5)65, labsize(medlarge))                                       /// 		
		yline(0, lcolor(red))                                       ///
		title("Konditionaler AME von 'Ost' und 95%-KI", ///
		      size(large) margin(medium))                           ///
        ysize(5)   


*---------------------------------------------------
* Kategoriale Interaktion: Ost##Beruf
*---------------------------------------------------
logit arblos bild alter frau ost##beruf
contrast ost##beruf
margins beruf#ost
marginsplot,   plotopts(connect(i))   title("")                            ///
   ytitle("P(Arbeitslosigkeit)", size(large))                              ///  
   ylabel(0(0.1)1, labsize(medlarge) angle(horizontal) grid format(%3.1f)) ///
   xtitle("berufliche Stellung", size(large) margin(medsmall))             /// 
   legend(pos(11) ring(0) row(2) order(3 4)  lab(3 "West")        ///
          lab(4 "Ost")  size(medlarge))                           ///
   ysize(5)

   
 *********************************************
 * Modellvergleich
 *********************************************

logit arblos bild                           // reduziertes Modell
logit arblos bild alter frau ost            // volles Modell
khb logit arblos bild || alter frau ost     // KHB Methode

 
*------------------------------------------
*    Logit vs. Probit
*------------------------------------------
logit arblos bild alter frau ost
eststo
probit arblos bild alter frau ost
eststo

* Funktioniert nur, wenn ESTOUT-Paket geladen
esttab using LogProb.rtf, pr2 b(%6.3f)                     ///
     mtitles("Logit" "Probit") nonumbers                   ///
     title(Vergleich von Logit- und Probit-Modell) replace


* Ein weiteres Modell für binäre AV	 
cloglog  arblos bild alter frau ost

